# 访问网页的模块
import requests
# excel模块
import openpyxl
# 正则表达式的模块
import re
url = "http://www.weather.com.cn/weather1d/101010100.shtml"
# 获取html
def getHtml(url):
    # 打开浏览器并打开网址
    resp = requests.get(url)
    # 设置编码格式
    resp.encoding = "utf-8"
    return resp.text
# 处理数据
def getData(html):
    lst = []
    city = re.findall('<span class="name">([\u4e00-\u9fa5]*)</span>', html)
    weather = re.findall('<span class="weather">([\u4e00-\u9fa5]*)</span>', html)
    wd = re.findall('<span class="wd">(.*)</span>', html)
    zs = re.findall('<span class="zs">([\u4e00-\u9fa5]*)</span>', html)
    for a, b, c, d in zip(city, weather, wd, zs):
        lst.append([a, b, c, d])
    return lst
html = getHtml(url)
data = getData(html)
print(data)

# 创建新的工作簿
workbook = openpyxl.Workbook()
# 创建工作表
sheet = workbook.create_sheet("景区天气")
# 这里append没有提示，如果append 里面是字符串会报错
for item in data:
    sheet.append(item)
workbook.save("天气爬虫.xlsx")
workbook.close()

# 读取excel数据
workbook = openpyxl.load_workbook("天气爬虫.xlsx")
sheet=workbook['景区天气']
lst = []
for row in sheet.rows:
    slts = []
    for cell in row:
        slts.append(cell.value)
    lst.append(slts)
for item in lst:
    print(item)

